library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(viridis)
## Loading required package: viridisLite
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(hms)

library(httr)
## 
## Attaching package: 'httr'
## The following object is masked from 'package:plotly':
## 
##     config
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE, 
  fig.width = 6, 
  fig.asp = .6,
  out.width = "90%"
)
theme_set(theme_minimal() + theme(legend.position = "bottom"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "virids"
)

scale_colour_discrete = scale_color_viridis_d
scale_fill_discrete = scale_fill_viridis_d
years_1 <- c(1900:2012, 2014)
years_2 <- c(2015:2019)

importing_data = function(x){
 
  if(str_detect(x, str_c(years_1, collapse = "|"))) {
  read_csv(x, na = c("NULL", "", "0"), col_types = "cicccciiiicc") 
  } 
  
  else if(str_detect(x, str_c(years_2, collapse = "|"))){
    read_csv(x, na = c("NULL", "", "0"), col_types = "cccicccccccccccccccccciiiiccc")
  }
}

boston_df <- 
  tibble(list.files("data", full.names = TRUE)) %>% 
  setNames("file_name") %>% 
  mutate(data = map(file_name, importing_data)) %>% 
  unnest(data) %>% 
  mutate(year = readr::parse_number(file_name),
         city = coalesce(city, residence),
         display_name = str_replace_all(display_name, "[^a-zA-Z0-9]", " ")) %>% 
  filter(!is.na(display_name)) %>% 
  select(-file_name, -residence)

do you need hms or is it part of tidyverse?

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:hms':
## 
##     hms
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
winners_df = 
  boston_df %>% 
    mutate(
      year = as.factor(year),
      official_time = as_hms(official_time),
      pace = as_hms(pace),
      place_overall = as.numeric(place_overall)
      )

Make winners over time plot

winners_df %>% 
  filter(overall == 1) %>% 
  arrange(year) %>% 
ggplot(aes(x = year, y = official_time, group = 1)) +
  geom_point() +
  geom_path() + 
 scale_x_discrete(breaks = c(1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020))

errors in data (~1:20:00 is fastes here but 1:59 is fasted – but not recorded – https://www.nytimes.com/2019/10/12/sports/eliud-kipchoge-marathon-record.html)

Plotly

Try with seconds – link the official time - specify range on y-axis

winners_df %>% 
  filter(overall == 1) %>% 
  plot_ly(type = 'scatter', mode = 'lines', text = ~paste('Name: ', display_name)) %>% 
  add_trace(x = ~year, y = ~official_time)%>% 
  layout(yaxis = list(categoryorder = "array", categoryarray = winners_df$official_time))

layout(yaxis = list(categoryorder = “total ascending”))

Marathon records

records_html = 
  read_html("https://www.topendsports.com/sport/athletics/record-marathon.htm")


record_marathon =
  records_html %>% 
  html_nodes("table") %>% 
  html_table(fill = T) %>% 
  lapply(., function(x) setNames(x, c("time", "date", "athlete", "country", "marathon"))) 

marathon = 
record_marathon %>% 
  as.data.frame() %>% 
  mutate(
    time = as_hms(time),
  ) %>% 
  separate(date, into = c("month", "day", "year")) %>% 
    mutate(year = as.numeric(year)) %>% 
    select(-month, -day)

marathon %>% 
plot_ly(type = 'scatter', mode = 'lines', text = ~paste('Name: ', athlete)) %>% 
  add_trace(x = ~year, y = ~time)%>% 
  layout(yaxis = list(categoryorder = "array", categoryarray = winners_df$official_time))
library(purrr)
library(lubridate)

library(lubridate) trial = tibble( date = c(“1:20:02”) )

trial %>% mutate( date = hms(date) )

age x year (intervals?) pace x year plot? Boston winner compared to record winner overall -